This notebook creates a Bag of Words embedding of the data set

library(magrittr)
package 㤼㸱magrittr㤼㸲 was built under R version 4.0.5
library(tidyverse)
package 㤼㸱tidyverse㤼㸲 was built under R version 4.0.5Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages --------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.5     v purrr   0.3.4
v tibble  3.1.3     v dplyr   1.0.7
v tidyr   1.1.3     v stringr 1.4.0
v readr   2.0.1     v forcats 0.5.1
package 㤼㸱ggplot2㤼㸲 was built under R version 4.0.5package 㤼㸱tibble㤼㸲 was built under R version 4.0.5package 㤼㸱tidyr㤼㸲 was built under R version 4.0.5package 㤼㸱purrr㤼㸲 was built under R version 4.0.5package 㤼㸱dplyr㤼㸲 was built under R version 4.0.5package 㤼㸱stringr㤼㸲 was built under R version 4.0.5package 㤼㸱forcats㤼㸲 was built under R version 4.0.5-- Conflicts ------------------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
x tidyr::extract()   masks magrittr::extract()
x dplyr::filter()    masks stats::filter()
x dplyr::lag()       masks stats::lag()
x purrr::set_names() masks magrittr::set_names()
library(caret)
package 㤼㸱caret㤼㸲 was built under R version 4.0.5Loading required package: lattice
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     

Attaching package: 㤼㸱caret㤼㸲

The following object is masked from 㤼㸱package:purrr㤼㸲:

    lift
library(tictoc)
package 㤼㸱tictoc㤼㸲 was built under R version 4.0.5
source("../bow_creation.R")
package 㤼㸱tm㤼㸲 was built under R version 4.0.5Loading required package: NLP

Attaching package: 㤼㸱NLP㤼㸲

The following object is masked from 㤼㸱package:ggplot2㤼㸲:

    annotate
source("./parameters.R")

# Rich error reporting
options(error = function() {
  sink(stderr())
  on.exit(sink(NULL))
  traceback(3, max.lines = 1L)
  if (!interactive()) {
    q(status = 1)
  }
})

Parameters

These are the main parameters used for the generation of the Bag of Words.

# Number of lines sampled from the data set (to reduce computing times during the exercise)
lines_sampled = "balanced"

# Minimal number of occurrences of a word in the corpus to be taken into the bag of words
min_word_occurence = 100

# Minimal number of occurrences of a nGram in the corpus to be taken into the bag of words
min_ngram_occurence = 1000

# Shall we use bigrams, trigrams, more?
nGrams=2

# Weighting function used for the Bag of Words
# Possible values: "bin", "tf", "tfidf"
weighting = "tfidf"

# Language of the sentences ("en", "fr"...)
language="en"

# Cutoff for the ratio of correlation. Features over-correlated are deleted.
cutoff = 1

# Shall we cheat and remove all lines full of zeros in the Bag of Words?
remove_zeros = TRUE

Open the cleaned data set

# Initialize a time counter
tic("Time to run all")
# Open the CSV
df = read_csv("cleaned.csv",col_types=col_types)
# For the purpose of speeding the experimentation we will work on a sample of the data frame only
set.seed(42)
if (lines_sampled == "balanced") {
  # Split the data set between toxic and non-toxic
  df_toxic = df[df$toxic == 1,]
  df_ok = df[df$toxic == 0,]
  # Since non toxic is around 10 time bigger than toxic, sample it to the same size
  df_ok_sampled = df_ok[sample(nrow(df_ok), nrow(df_toxic)), ]
  # Merge back the two data frames per row
  df = bind_rows(df_ok_sampled,df_toxic)
} else if (lines_sampled > 0) {
  df = df[sample(nrow(df), lines_sampled), ]
}
df

Create the bag of words

Get the weighting function to use according to the tag

# Possible weighting functions
weighting_functions = vector(mode="list", length=3)
weighting_functions$bin = weightBin
weighting_functions$tf = weightTf
weighting_functions$tfidf = weightTfIdf


weighting_function = weighting_functions[weighting]

# weighting_function

Run the function to get a bag of words

# Build the bag of words
tic("Bag of words creation")
bow = bag_of_words(df,"comment_text",min_word_occurence,min_ngram_occurence,nGrams,weighting_function,language)
[1] "VCorpus:"
<<VCorpus>>
Metadata:  corpus specific: 0, document level (indexed): 0
Content:  documents: 40950
[1] "Dimensions of the words matrice:"
[1] 40950  1318
[1] "Dimensions of the nGrams matrice:"
[1] 40950    90
[1] "Dimensions of the features matrice:"
[1] 40950  1408
toc()
Bag of words creation: 138.19 sec elapsed
bow

Store the original length of the BOW

bow_original_length = dim(bow)[2]
bow_original_length
[1] 1408

Find and delete highly correlated variables

writeLines(paste0("Looking for correlations with cutoff: ", cutoff, " on ", bow_original_length, " features."))
Looking for correlations with cutoff: 1 on 1408 features.
# Find the correlated columns
tic("Build the correlations matrix:")
# Build the correlation matrix
cor_mat = cor(bow)
toc()
Build the correlations matrix:: 56.32 sec elapsed

Find correlations

tic("Find correlations:")
# Find correlations using it
highlyCor = findCorrelation(cor_mat, cutoff = cutoff, verbose = FALSE, exact = TRUE)
toc()
Find correlations:: 1.1 sec elapsed
# Count it for info
qty_of_hc = length(highlyCor)
writeLines(paste0("Number of highly correlated features found: ", qty_of_hc))
Number of highly correlated features found: 0
pruned_bow = bow

# Delete if needed
if (qty_of_hc > 0) {
  pruned_bow = bow[,-as.vector(highlyCor)]
} else {
  print("No features removed")
}
[1] "No features removed"
# Store the dimensions of the Bow afterwards
bow_final_length = dim(pruned_bow)[2]
writeLines(paste0("Remaning features: ", bow_final_length))
Remaning features: 1408

Build the data set

# Prefix the column names of the original df to avoid collision with words from BoW
prefixed_df_cols = paste("df", colnames(df), sep = "_")
colnames(df) = prefixed_df_cols
# Bind the original data frame and the bow
df_bow = bind_cols(df,pruned_bow)
# Clean NA values, in case of
df_bow %<>% drop_na()
# show
df_bow

Here we can deactivate rows full of zeros. This is a clearly cheating as we prune the test set too. This avoids to deal with perturbations on some algorithms.

# Go through each row, return TRUE is at least one value is not zero
non_zero_rows = apply(df_bow[,-(1:9)], 1, function(row) any(row !=0 ))
writeLines(paste0("Rows full of zeros: ",sum(!non_zero_rows, na.rm = TRUE)))
Rows full of zeros: 239
# Subset
if (remove_zeros == TRUE) {
  df_bow = df_bow[non_zero_rows,]
  writeLines(paste0("Remaning rows: ",dim(df_bow)[1]))
}
Remaning rows: 40707

Store the resulting dataset

# Clean NA values, in case of
df_bow %<>% drop_na()
# Define a file name with parametric values
df_bow_name = sprintf("bow_%s__min_words_%s_%sgrams_%s__sampling_%s__cor_cut_%s_from_%s_to_%s",
                      weighting,
                      min_word_occurence,
                      nGrams,
                      min_ngram_occurence,
                      lines_sampled,
                      cutoff,
                      bow_original_length,
                      bow_final_length
                      )
# Did we removed the lines full of zeros?
if (remove_zeros == TRUE) {
  df_bow_name = paste(df_bow_name,"rm0", sep = "_")
}
# Add the extension
df_bow_name = paste(df_bow_name,"csv", sep = ".")
# Write it
write_csv(df_bow,file=df_bow_name)
# check the final result
df_bow
# Timestamp to know when was the last full run
writeLines(paste0("Finished on: ", Sys.time()))
Finished on: 2021-08-24 13:38:25
writeLines(paste0("Created file: ", df_bow_name))
Created file: bow_tfidf__min_words_100_2grams_1000__sampling_balanced__cor_cut_1_from_1408_to_1408_rm0.csv
toc()
Time to run all: 223.36 sec elapsed
LS0tDQp0aXRsZTogIkJ1aWxkIEJhZyBvZiBXb3JkcyBlbWJlZGRpbmciDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIG5vdGVib29rIGNyZWF0ZXMgYSBCYWcgb2YgV29yZHMgZW1iZWRkaW5nIG9mIHRoZSBkYXRhIHNldA0KDQoNCmBgYHtyfQ0KbGlicmFyeShtYWdyaXR0cikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkodGljdG9jKQ0Kc291cmNlKCIuLi9ib3dfY3JlYXRpb24uUiIpDQpzb3VyY2UoIi4vcGFyYW1ldGVycy5SIikNCg0KIyBSaWNoIGVycm9yIHJlcG9ydGluZw0Kb3B0aW9ucyhlcnJvciA9IGZ1bmN0aW9uKCkgew0KICBzaW5rKHN0ZGVycigpKQ0KICBvbi5leGl0KHNpbmsoTlVMTCkpDQogIHRyYWNlYmFjaygzLCBtYXgubGluZXMgPSAxTCkNCiAgaWYgKCFpbnRlcmFjdGl2ZSgpKSB7DQogICAgcShzdGF0dXMgPSAxKQ0KICB9DQp9KQ0KYGBgDQoNCiMgUGFyYW1ldGVycw0KDQpUaGVzZSBhcmUgdGhlIG1haW4gcGFyYW1ldGVycyB1c2VkIGZvciB0aGUgZ2VuZXJhdGlvbiBvZiB0aGUgQmFnIG9mIFdvcmRzLg0KDQpgYGB7cn0NCiMgTnVtYmVyIG9mIGxpbmVzIHNhbXBsZWQgZnJvbSB0aGUgZGF0YSBzZXQgKHRvIHJlZHVjZSBjb21wdXRpbmcgdGltZXMgZHVyaW5nIHRoZSBleGVyY2lzZSkNCmxpbmVzX3NhbXBsZWQgPSAiYmFsYW5jZWQiDQoNCiMgTWluaW1hbCBudW1iZXIgb2Ygb2NjdXJyZW5jZXMgb2YgYSB3b3JkIGluIHRoZSBjb3JwdXMgdG8gYmUgdGFrZW4gaW50byB0aGUgYmFnIG9mIHdvcmRzDQptaW5fd29yZF9vY2N1cmVuY2UgPSAxMDANCg0KIyBNaW5pbWFsIG51bWJlciBvZiBvY2N1cnJlbmNlcyBvZiBhIG5HcmFtIGluIHRoZSBjb3JwdXMgdG8gYmUgdGFrZW4gaW50byB0aGUgYmFnIG9mIHdvcmRzDQptaW5fbmdyYW1fb2NjdXJlbmNlID0gMTAwMA0KDQojIFNoYWxsIHdlIHVzZSBiaWdyYW1zLCB0cmlncmFtcywgbW9yZT8NCm5HcmFtcz0yDQoNCiMgV2VpZ2h0aW5nIGZ1bmN0aW9uIHVzZWQgZm9yIHRoZSBCYWcgb2YgV29yZHMNCiMgUG9zc2libGUgdmFsdWVzOiAiYmluIiwgInRmIiwgInRmaWRmIg0Kd2VpZ2h0aW5nID0gInRmaWRmIg0KDQojIExhbmd1YWdlIG9mIHRoZSBzZW50ZW5jZXMgKCJlbiIsICJmciIuLi4pDQpsYW5ndWFnZT0iZW4iDQoNCiMgQ3V0b2ZmIGZvciB0aGUgcmF0aW8gb2YgY29ycmVsYXRpb24uIEZlYXR1cmVzIG92ZXItY29ycmVsYXRlZCBhcmUgZGVsZXRlZC4NCmN1dG9mZiA9IDAuMQ0KDQojIFNoYWxsIHdlIGNoZWF0IGFuZCByZW1vdmUgYWxsIGxpbmVzIGZ1bGwgb2YgemVyb3MgaW4gdGhlIEJhZyBvZiBXb3Jkcz8NCnJlbW92ZV96ZXJvcyA9IFRSVUUNCg0KYGBgDQoNCg0KDQojIE9wZW4gdGhlIGNsZWFuZWQgZGF0YSBzZXQNCg0KYGBge3J9DQojIEluaXRpYWxpemUgYSB0aW1lIGNvdW50ZXINCnRpYygiVGltZSB0byBydW4gYWxsIikNCiMgT3BlbiB0aGUgQ1NWDQpkZiA9IHJlYWRfY3N2KCJjbGVhbmVkLmNzdiIsY29sX3R5cGVzPWNvbF90eXBlcykNCiMgRm9yIHRoZSBwdXJwb3NlIG9mIHNwZWVkaW5nIHRoZSBleHBlcmltZW50YXRpb24gd2Ugd2lsbCB3b3JrIG9uIGEgc2FtcGxlIG9mIHRoZSBkYXRhIGZyYW1lIG9ubHkNCnNldC5zZWVkKDQyKQ0KaWYgKGxpbmVzX3NhbXBsZWQgPT0gImJhbGFuY2VkIikgew0KICAjIFNwbGl0IHRoZSBkYXRhIHNldCBiZXR3ZWVuIHRveGljIGFuZCBub24tdG94aWMNCiAgZGZfdG94aWMgPSBkZltkZiR0b3hpYyA9PSAxLF0NCiAgZGZfb2sgPSBkZltkZiR0b3hpYyA9PSAwLF0NCiAgIyBTaW5jZSBub24gdG94aWMgaXMgYXJvdW5kIDEwIHRpbWUgYmlnZ2VyIHRoYW4gdG94aWMsIHNhbXBsZSBpdCB0byB0aGUgc2FtZSBzaXplDQogIGRmX29rX3NhbXBsZWQgPSBkZl9va1tzYW1wbGUobnJvdyhkZl9vayksIG5yb3coZGZfdG94aWMpKSwgXQ0KICAjIE1lcmdlIGJhY2sgdGhlIHR3byBkYXRhIGZyYW1lcyBwZXIgcm93DQogIGRmID0gYmluZF9yb3dzKGRmX29rX3NhbXBsZWQsZGZfdG94aWMpDQp9IGVsc2UgaWYgKGxpbmVzX3NhbXBsZWQgPiAwKSB7DQogIGRmID0gZGZbc2FtcGxlKG5yb3coZGYpLCBsaW5lc19zYW1wbGVkKSwgXQ0KfQ0KZGYNCmBgYA0KDQojIENyZWF0ZSB0aGUgYmFnIG9mIHdvcmRzDQoNCkdldCB0aGUgd2VpZ2h0aW5nIGZ1bmN0aW9uIHRvIHVzZSBhY2NvcmRpbmcgdG8gdGhlIHRhZw0KDQpgYGB7cn0NCiMgUG9zc2libGUgd2VpZ2h0aW5nIGZ1bmN0aW9ucw0Kd2VpZ2h0aW5nX2Z1bmN0aW9ucyA9IHZlY3Rvcihtb2RlPSJsaXN0IiwgbGVuZ3RoPTMpDQp3ZWlnaHRpbmdfZnVuY3Rpb25zJGJpbiA9IHdlaWdodEJpbg0Kd2VpZ2h0aW5nX2Z1bmN0aW9ucyR0ZiA9IHdlaWdodFRmDQp3ZWlnaHRpbmdfZnVuY3Rpb25zJHRmaWRmID0gd2VpZ2h0VGZJZGYNCg0KDQp3ZWlnaHRpbmdfZnVuY3Rpb24gPSB3ZWlnaHRpbmdfZnVuY3Rpb25zW3dlaWdodGluZ10NCg0KIyB3ZWlnaHRpbmdfZnVuY3Rpb24NCmBgYA0KDQpSdW4gdGhlIGZ1bmN0aW9uIHRvIGdldCBhIGJhZyBvZiB3b3Jkcw0KDQpgYGB7cn0NCiMgQnVpbGQgdGhlIGJhZyBvZiB3b3Jkcw0KdGljKCJCYWcgb2Ygd29yZHMgY3JlYXRpb24iKQ0KYm93ID0gYmFnX29mX3dvcmRzKGRmLCJjb21tZW50X3RleHQiLG1pbl93b3JkX29jY3VyZW5jZSxtaW5fbmdyYW1fb2NjdXJlbmNlLG5HcmFtcyx3ZWlnaHRpbmdfZnVuY3Rpb24sbGFuZ3VhZ2UpDQp0b2MoKQ0KYm93DQpgYGANCg0KU3RvcmUgdGhlIG9yaWdpbmFsIGxlbmd0aCBvZiB0aGUgQk9XDQoNCmBgYHtyfQ0KYm93X29yaWdpbmFsX2xlbmd0aCA9IGRpbShib3cpWzJdDQpib3dfb3JpZ2luYWxfbGVuZ3RoDQpgYGANCg0KDQojIEZpbmQgYW5kIGRlbGV0ZSBoaWdobHkgY29ycmVsYXRlZCB2YXJpYWJsZXMNCg0KDQpgYGB7cn0NCndyaXRlTGluZXMocGFzdGUwKCJMb29raW5nIGZvciBjb3JyZWxhdGlvbnMgd2l0aCBjdXRvZmY6ICIsIGN1dG9mZiwgIiBvbiAiLCBib3dfb3JpZ2luYWxfbGVuZ3RoLCAiIGZlYXR1cmVzLiIpKQ0KDQojIEZpbmQgdGhlIGNvcnJlbGF0ZWQgY29sdW1ucw0KdGljKCJCdWlsZCB0aGUgY29ycmVsYXRpb25zIG1hdHJpeDoiKQ0KIyBCdWlsZCB0aGUgY29ycmVsYXRpb24gbWF0cml4DQpjb3JfbWF0ID0gY29yKGJvdykNCnRvYygpDQpgYGANCg0KDQpGaW5kIGNvcnJlbGF0aW9ucw0KDQpgYGB7cn0NCnRpYygiRmluZCBjb3JyZWxhdGlvbnM6IikNCiMgRmluZCBjb3JyZWxhdGlvbnMgdXNpbmcgaXQNCmhpZ2hseUNvciA9IGZpbmRDb3JyZWxhdGlvbihjb3JfbWF0LCBjdXRvZmYgPSBjdXRvZmYsIHZlcmJvc2UgPSBGQUxTRSwgZXhhY3QgPSBUUlVFKQ0KdG9jKCkNCg0KIyBDb3VudCBpdCBmb3IgaW5mbw0KcXR5X29mX2hjID0gbGVuZ3RoKGhpZ2hseUNvcikNCndyaXRlTGluZXMocGFzdGUwKCJOdW1iZXIgb2YgaGlnaGx5IGNvcnJlbGF0ZWQgZmVhdHVyZXMgZm91bmQ6ICIsIHF0eV9vZl9oYykpDQoNCnBydW5lZF9ib3cgPSBib3cNCg0KIyBEZWxldGUgaWYgbmVlZGVkDQppZiAocXR5X29mX2hjID4gMCkgew0KICBwcnVuZWRfYm93ID0gYm93WywtYXMudmVjdG9yKGhpZ2hseUNvcildDQp9IGVsc2Ugew0KICBwcmludCgiTm8gZmVhdHVyZXMgcmVtb3ZlZCIpDQp9DQoNCiMgU3RvcmUgdGhlIGRpbWVuc2lvbnMgb2YgdGhlIEJvdyBhZnRlcndhcmRzDQpib3dfZmluYWxfbGVuZ3RoID0gZGltKHBydW5lZF9ib3cpWzJdDQp3cml0ZUxpbmVzKHBhc3RlMCgiUmVtYW5pbmcgZmVhdHVyZXM6ICIsIGJvd19maW5hbF9sZW5ndGgpKQ0KYGBgDQoNCg0KDQojIEJ1aWxkIHRoZSBkYXRhIHNldA0KDQpgYGB7cn0NCiMgUHJlZml4IHRoZSBjb2x1bW4gbmFtZXMgb2YgdGhlIG9yaWdpbmFsIGRmIHRvIGF2b2lkIGNvbGxpc2lvbiB3aXRoIHdvcmRzIGZyb20gQm9XDQpwcmVmaXhlZF9kZl9jb2xzID0gcGFzdGUoImRmIiwgY29sbmFtZXMoZGYpLCBzZXAgPSAiXyIpDQpjb2xuYW1lcyhkZikgPSBwcmVmaXhlZF9kZl9jb2xzDQojIEJpbmQgdGhlIG9yaWdpbmFsIGRhdGEgZnJhbWUgYW5kIHRoZSBib3cNCmRmX2JvdyA9IGJpbmRfY29scyhkZixwcnVuZWRfYm93KQ0KIyBDbGVhbiBOQSB2YWx1ZXMsIGluIGNhc2Ugb2YNCmRmX2JvdyAlPD4lIGRyb3BfbmEoKQ0KIyBzaG93DQpkZl9ib3cNCmBgYA0KDQpIZXJlIHdlIGNhbiBkZWFjdGl2YXRlIHJvd3MgZnVsbCBvZiB6ZXJvcy4NClRoaXMgaXMgYSBjbGVhcmx5IGNoZWF0aW5nIGFzIHdlIHBydW5lIHRoZSB0ZXN0IHNldCB0b28uDQpUaGlzIGF2b2lkcyB0byBkZWFsIHdpdGggcGVydHVyYmF0aW9ucyBvbiBzb21lIGFsZ29yaXRobXMuDQoNCmBgYHtyfQ0KIyBHbyB0aHJvdWdoIGVhY2ggcm93LCByZXR1cm4gVFJVRSBpcyBhdCBsZWFzdCBvbmUgdmFsdWUgaXMgbm90IHplcm8NCm5vbl96ZXJvX3Jvd3MgPSBhcHBseShkZl9ib3dbLC0oMTo5KV0sIDEsIGZ1bmN0aW9uKHJvdykgYW55KHJvdyAhPTAgKSkNCndyaXRlTGluZXMocGFzdGUwKCJSb3dzIGZ1bGwgb2YgemVyb3M6ICIsc3VtKCFub25femVyb19yb3dzLCBuYS5ybSA9IFRSVUUpKSkNCiMgU3Vic2V0DQppZiAocmVtb3ZlX3plcm9zID09IFRSVUUpIHsNCiAgZGZfYm93ID0gZGZfYm93W25vbl96ZXJvX3Jvd3MsXQ0KICB3cml0ZUxpbmVzKHBhc3RlMCgiUmVtYW5pbmcgcm93czogIixkaW0oZGZfYm93KVsxXSkpDQp9DQpgYGANCg0KIyBTdG9yZSB0aGUgcmVzdWx0aW5nIGRhdGFzZXQNCg0KDQpgYGB7cn0NCiMgQ2xlYW4gTkEgdmFsdWVzLCBpbiBjYXNlIG9mDQpkZl9ib3cgJTw+JSBkcm9wX25hKCkNCiMgRGVmaW5lIGEgZmlsZSBuYW1lIHdpdGggcGFyYW1ldHJpYyB2YWx1ZXMNCmRmX2Jvd19uYW1lID0gc3ByaW50ZigiYm93XyVzX19taW5fd29yZHNfJXNfJXNncmFtc18lc19fc2FtcGxpbmdfJXNfX2Nvcl9jdXRfJXNfZnJvbV8lc190b18lcyIsDQogICAgICAgICAgICAgICAgICAgICAgd2VpZ2h0aW5nLA0KICAgICAgICAgICAgICAgICAgICAgIG1pbl93b3JkX29jY3VyZW5jZSwNCiAgICAgICAgICAgICAgICAgICAgICBuR3JhbXMsDQogICAgICAgICAgICAgICAgICAgICAgbWluX25ncmFtX29jY3VyZW5jZSwNCiAgICAgICAgICAgICAgICAgICAgICBsaW5lc19zYW1wbGVkLA0KICAgICAgICAgICAgICAgICAgICAgIGN1dG9mZiwNCiAgICAgICAgICAgICAgICAgICAgICBib3dfb3JpZ2luYWxfbGVuZ3RoLA0KICAgICAgICAgICAgICAgICAgICAgIGJvd19maW5hbF9sZW5ndGgNCiAgICAgICAgICAgICAgICAgICAgICApDQojIERpZCB3ZSByZW1vdmVkIHRoZSBsaW5lcyBmdWxsIG9mIHplcm9zPw0KaWYgKHJlbW92ZV96ZXJvcyA9PSBUUlVFKSB7DQogIGRmX2Jvd19uYW1lID0gcGFzdGUoZGZfYm93X25hbWUsInJtMCIsIHNlcCA9ICJfIikNCn0NCiMgQWRkIHRoZSBleHRlbnNpb24NCmRmX2Jvd19uYW1lID0gcGFzdGUoZGZfYm93X25hbWUsImNzdiIsIHNlcCA9ICIuIikNCiMgV3JpdGUgaXQNCndyaXRlX2NzdihkZl9ib3csZmlsZT1kZl9ib3dfbmFtZSkNCiMgY2hlY2sgdGhlIGZpbmFsIHJlc3VsdA0KZGZfYm93DQpgYGANCg0KDQpgYGB7cn0NCiMgVGltZXN0YW1wIHRvIGtub3cgd2hlbiB3YXMgdGhlIGxhc3QgZnVsbCBydW4NCndyaXRlTGluZXMocGFzdGUwKCJGaW5pc2hlZCBvbjogIiwgU3lzLnRpbWUoKSkpDQp3cml0ZUxpbmVzKHBhc3RlMCgiQ3JlYXRlZCBmaWxlOiAiLCBkZl9ib3dfbmFtZSkpDQp0b2MoKQ0KYGBgDQoNCg==